install.packages("tm")
install.packages("matrixStats") # For statistics
install.packages("tidytext")
install.packages("stringr")
install.packages("textdata")
install.packages("chinese.misc")
install.packages("stm")

require(tm)
require(matrixStats)
require(tidyverse)
require(readr)
require(tidytext)
require(stringr)
require(textdata)
require(chinese.misc)
require(jiebaR)
require(jiebaRD)
require(stm)

#------------------------------------------------------------------------------------------
## Method 5. Top Words
#------------------------------------------------------------------------------------------

Mob_India_1962 <- read_csv("mobilization_campaign_India_1962.csv")
Mob_Soviet_1969 <- read_csv("mobilization_campaign_Soviet_1969.csv")
Mob_Vietnam_1974 <- read_csv("mobilization_campaign_Vietnam_1974.csv")
Mob_Vietnam_1979 <- read_csv("mobilization_campaign_Vietnam_1979.csv")
Mob_Vietnam_1979 <- Mob_Vietnam_1979[,1:7]

Mob_Combined <- rbind(Mob_India_1962, Mob_Soviet_1969, Mob_Vietnam_1974, Mob_Vietnam_1979)
Mob_Ori <- Mob_Combined$Text

Pac_Japan_1990 <- read_csv("pacification_campaign_Japan_1990.csv")
Pac_Japan_1996 <- read_csv("pacification_campaign_Japan_1996.csv")
Pac_Japan_2005 <- read_csv("pacification_campaign_Japan_2005.csv")
Pac_Japan_2010 <- read_csv("pacification_campaign_Japan_2010.csv")
Pac_Japan_2012 <- read_csv("pacification_campaign_Japan_2012.csv")
Pac_Philippines_2016 <- read_csv("pacification_campaign_Philippines_2016.csv")
Pac_India_2017 <- read_csv("pacification_campaign_India_2017.csv")

Pac_Combined <- rbind(Pac_Japan_1990, Pac_Japan_1996, Pac_Japan_2005,
                      Pac_Japan_2010, Pac_Japan_2012, Pac_Philippines_2016,
                      Pac_India_2017)
Pac_Ori <- Pac_Combined$Text

wk <- worker()

# ==================== Table 7.9 =====================================================

# Remove stop words 
added_stopwords <- make_stoplist(x='stop_words_list.txt',print = TRUE)
jieba_stopwords <- make_stoplist(x= 'jiebar',print = TRUE)
stopwords <- c(added_stopwords,jieba_stopwords)

Stopword_Removed_Mob <- c(NULL)
for (i in 1:length(Mob_Ori)){
  Stopword_Removed_Mob[i] <-slim_text(Mob_Ori[i], rm_place = FALSE)
}
DTM_Stopword_Mob <- corp_or_dtm(Stopword_Removed_Mob,
                                from = "v",
                                type = "dtm",
                                enc = "auto",
                                mycutter = wk,
                                stop_word = stopwords,
                                stop_pattern = NULL)
sort_tf(DTM_Stopword_Mob, top = 50)

Origin_token_Pac <- seg_file(Pac_Ori, from = 'v')
Stopword_Removed_Pac <- c(NULL)
for (i in 1:length(Pac_Ori)){
  Stopword_Removed_Pac[i] <-slim_text(Pac_Ori[i], rm_place = FALSE)
}
DTM_Stopword_Pac <- corp_or_dtm(Stopword_Removed_Pac,
                                from = "v",
                                type = "dtm",
                                enc = "auto",
                                mycutter = wk,
                                stop_word = stopwords,
                                stop_pattern = NULL)
sort_tf(DTM_Stopword_Pac, top = 50)

Stopword_Removed_India1962 <- c(NULL)
for (i in 1:length(Mob_India_1962$Text)){
  Stopword_Removed_India1962[i] <-slim_text(Mob_India_1962$Text[i], rm_place = FALSE)
}
DTM_Stopword_India1962 <- corp_or_dtm(Stopword_Removed_India1962,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_India1962, top = 50)

Stopword_Removed_Soviet1969 <- c(NULL)
for (i in 1:length(Mob_Soviet_1969$Text)){
  Stopword_Removed_Soviet1969[i] <-slim_text(Mob_Soviet_1969$Text[i], rm_place = FALSE)
}
DTM_Stopword_Soviet1969 <- corp_or_dtm(Stopword_Removed_Soviet1969,
                                       from = "v",
                                       type = "dtm",
                                       enc = "auto",
                                       mycutter = wk,
                                       stop_word = stopwords,
                                       stop_pattern = NULL)
sort_tf(DTM_Stopword_Soviet1969, top = 50)

Stopword_Removed_Vietnam1974 <- c(NULL)
for (i in 1:length(Mob_Vietnam_1974$Text)){
  Stopword_Removed_Vietnam1974[i] <-slim_text(Mob_Vietnam_1974$Text[i], rm_place = FALSE)
}
DTM_Stopword_Vietnam1974 <- corp_or_dtm(Stopword_Removed_Vietnam1974,
                                        from = "v",
                                        type = "dtm",
                                        enc = "auto",
                                        mycutter = wk,
                                        stop_word = stopwords,
                                        stop_pattern = NULL)
sort_tf(DTM_Stopword_Vietnam1974, top = 50)

Stopword_Removed_Vietnam1979 <- c(NULL)
for (i in 1:length(Mob_Vietnam_1979$Text)){
  Stopword_Removed_Vietnam1979[i] <-slim_text(Mob_Vietnam_1979$Text[i], rm_place = FALSE)
}
DTM_Stopword_Vietnam1979 <- corp_or_dtm(Stopword_Removed_Vietnam1979,
                                        from = "v",
                                        type = "dtm",
                                        enc = "auto",
                                        mycutter = wk,
                                        stop_word = stopwords,
                                        stop_pattern = NULL)
sort_tf(DTM_Stopword_Vietnam1979, top = 50)

# ==================== Table 7.10 =====================================================

Stopword_Removed_Japan1990 <- c(NULL)
for (i in 1:length(Pac_Japan_1990$Text)){
  Stopword_Removed_Japan1990[i] <-slim_text(Pac_Japan_1990$Text[i], rm_place = FALSE)
}
DTM_Stopword_Japan1990 <- corp_or_dtm(Stopword_Removed_Japan1990,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_Japan1990, top = 50)

Stopword_Removed_Japan1996 <- c(NULL)
for (i in 1:length(Pac_Japan_1996$Text)){
  Stopword_Removed_Japan1996[i] <-slim_text(Pac_Japan_1996$Text[i], rm_place = FALSE)
}
DTM_Stopword_Japan1996 <- corp_or_dtm(Stopword_Removed_Japan1996,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_Japan1996, top = 50)

Stopword_Removed_Japan2005 <- c(NULL)
for (i in 1:length(Pac_Japan_2005$Text)){
  Stopword_Removed_Japan2005[i] <-slim_text(Pac_Japan_2005$Text[i], rm_place = FALSE)
}
DTM_Stopword_Japan2005 <- corp_or_dtm(Stopword_Removed_Japan2005,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_Japan2005, top = 50)

Stopword_Removed_Japan2010 <- c(NULL)
for (i in 1:length(Pac_Japan_2010$Text)){
  Stopword_Removed_Japan2010[i] <-slim_text(Pac_Japan_2010$Text[i], rm_place = FALSE)
}
DTM_Stopword_Japan2010 <- corp_or_dtm(Stopword_Removed_Japan2010,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_Japan2010, top = 50)

Stopword_Removed_Japan2012 <- c(NULL)
for (i in 1:length(Pac_Japan_2012$Text)){
  Stopword_Removed_Japan2012[i] <-slim_text(Pac_Japan_2012$Text[i], rm_place = FALSE)
}
DTM_Stopword_Japan2012 <- corp_or_dtm(Stopword_Removed_Japan2012,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_Japan2012, top = 50)

Stopword_Removed_Philippines2016 <- c(NULL)
for (i in 1:length(Pac_Philippines_2016$Text)){
  Stopword_Removed_Philippines2016[i] <-slim_text(Pac_Philippines_2016$Text[i], rm_place = FALSE)
}
DTM_Stopword_Philippines2016 <- corp_or_dtm(Stopword_Removed_Philippines2016,
                                            from = "v",
                                            type = "dtm",
                                            enc = "auto",
                                            mycutter = wk,
                                            stop_word = stopwords,
                                            stop_pattern = NULL)
sort_tf(DTM_Stopword_Philippines2016, top = 50)

Stopword_Removed_India2017 <- c(NULL)
for (i in 1:length(Pac_India_2017$Text)){
  Stopword_Removed_India2017[i] <-slim_text(Pac_India_2017$Text[i], rm_place = FALSE)
}
DTM_Stopword_India2017 <- corp_or_dtm(Stopword_Removed_India2017,
                                      from = "v",
                                      type = "dtm",
                                      enc = "auto",
                                      mycutter = wk,
                                      stop_word = stopwords,
                                      stop_pattern = NULL)
sort_tf(DTM_Stopword_India2017, top = 50)